{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22岁</td>\n",
       "      <td>162cm</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31岁</td>\n",
       "      <td>161cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38岁</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23岁</td>\n",
       "      <td>158cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24岁</td>\n",
       "      <td>160cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31岁</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22岁</td>\n",
       "      <td>168cm</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0      1   2            3        4   5       6   \\\n",
       "0  22岁  162cm  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31岁  161cm  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23岁  158cm  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24岁  160cm  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22岁  168cm  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                  7         8            9   \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38岁  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31岁  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29岁  168 - 193cm   \n",
       "\n",
       "   10  11       12  13  14  \n",
       "0  不限  不限      台湾省  不限  不限  \n",
       "1  不限  不限  澳门特别行政区  不限  不限  \n",
       "2  不限  不限      台湾省  不限  不限  \n",
       "3  不限  不限      台湾省  不限  不限  \n",
       "4  不限  不限  香港特别行政区  不限  不限  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import re\n",
    "import jieba\n",
    "\n",
    "\n",
    "df = pd.read_csv(\"sample.csv\",encoding=\"gbk\",header=None)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 设置行列索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>对象学历</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22岁</td>\n",
       "      <td>162cm</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31岁</td>\n",
       "      <td>161cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38岁</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23岁</td>\n",
       "      <td>158cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24岁</td>\n",
       "      <td>160cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31岁</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22岁</td>\n",
       "      <td>168cm</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    年龄     身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22岁  162cm  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31岁  161cm  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23岁  158cm  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24岁  160cm  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22岁  168cm  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍      对象年龄         对象身高  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38岁  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31岁  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29岁  168 - 193cm   \n",
       "\n",
       "  对象学历 对象薪水     对象家乡 对象婚姻 对象住房  \n",
       "0   不限   不限      台湾省   不限   不限  \n",
       "1   不限   不限  澳门特别行政区   不限   不限  \n",
       "2   不限   不限      台湾省   不限   不限  \n",
       "3   不限   不限      台湾省   不限   不限  \n",
       "4   不限   不限  香港特别行政区   不限   不限  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 指定行索引\n",
    "df.index = range(len(df))\n",
    "\n",
    "# 指定列索引\n",
    "df.columns = ['年龄', '身高', '学历', '工资', '家乡', '婚姻', '住房', '自我介绍', '对象年龄', '对象身高', '对象学历', '对象薪水', '对象家乡', '对象婚姻', '对象住房']\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 查看是否有空值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "年龄      False\n",
       "身高      False\n",
       "学历      False\n",
       "工资      False\n",
       "家乡      False\n",
       "婚姻      False\n",
       "住房      False\n",
       "自我介绍    False\n",
       "对象年龄    False\n",
       "对象身高    False\n",
       "对象学历    False\n",
       "对象薪水    False\n",
       "对象家乡    False\n",
       "对象婚姻    False\n",
       "对象住房    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().any(axis = 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 去重"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "去重前数据量： (2875, 15)\n",
      "去重后数据量： (2827, 15)\n"
     ]
    }
   ],
   "source": [
    "print('去重前数据量：', df.shape)\n",
    "# 去重\n",
    "df.drop_duplicates(inplace=True)\n",
    "print('去重后数据量：', df.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 把年龄中的“岁”去掉"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>对象学历</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162cm</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38岁</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160cm</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31岁</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168cm</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄     身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162cm  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161cm  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158cm  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160cm  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168cm  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍      对象年龄         对象身高  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38岁  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31岁  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29岁  168 - 193cm   \n",
       "\n",
       "  对象学历 对象薪水     对象家乡 对象婚姻 对象住房  \n",
       "0   不限   不限      台湾省   不限   不限  \n",
       "1   不限   不限  澳门特别行政区   不限   不限  \n",
       "2   不限   不限      台湾省   不限   不限  \n",
       "3   不限   不限      台湾省   不限   不限  \n",
       "4   不限   不限  香港特别行政区   不限   不限  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['年龄'] = df['年龄'].str[0:2]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 把身高中的“cm”去掉，以便后续处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>对象学历</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38岁</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31岁</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍      对象年龄         对象身高  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38岁  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31岁  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29岁  168 - 193cm   \n",
       "\n",
       "  对象学历 对象薪水     对象家乡 对象婚姻 对象住房  \n",
       "0   不限   不限      台湾省   不限   不限  \n",
       "1   不限   不限  澳门特别行政区   不限   不限  \n",
       "2   不限   不限      台湾省   不限   不限  \n",
       "3   不限   不限      台湾省   不限   不限  \n",
       "4   不限   不限  香港特别行政区   不限   不限  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 不用了，正则遍历有点消耗，数据量少，直接用上面方法\n",
    "# for height in df['身高']:\n",
    "#     re.findall('(.*)cm',height)\n",
    "df['身高'] = df['身高'].str[0:3]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "家乡\n",
       "Baden-Wurttemberg      1\n",
       "Basse-Normandie        1\n",
       "Bayern                 1\n",
       "England                1\n",
       "Ontario                3\n",
       "Stockholm              1\n",
       "Waikato                1\n",
       "上海市                   82\n",
       "云南省                   72\n",
       "以后告诉你                206\n",
       "内蒙古自治区               131\n",
       "加拿大                    1\n",
       "北京市                  100\n",
       "台湾省                  149\n",
       "吉林省                  109\n",
       "四川省                   45\n",
       "天津市                   43\n",
       "宁夏回族自治区               28\n",
       "安徽省                   46\n",
       "山东省                   59\n",
       "山西省                  100\n",
       "广东省                   49\n",
       "广西壮族自治区               24\n",
       "德国                     1\n",
       "新加坡                    4\n",
       "新南威尔士州                 1\n",
       "新疆自治区                 33\n",
       "日本                     3\n",
       "曼谷                     1\n",
       "江苏省                   87\n",
       "江西省                   70\n",
       "河北省                  113\n",
       "河南省                   33\n",
       "浙江省                   57\n",
       "海南省                   44\n",
       "湖北省                   47\n",
       "湖南省                   95\n",
       "澳大利亚                   2\n",
       "澳门特别行政区              130\n",
       "甘肃省                   35\n",
       "神奈川县                   1\n",
       "福建省                   40\n",
       "维多利亚州                  1\n",
       "缅甸                     1\n",
       "美国                     1\n",
       "英国                     1\n",
       "西藏自治区                246\n",
       "贵州省                   43\n",
       "越南                     1\n",
       "辽宁省                  151\n",
       "重庆市                   23\n",
       "钓鱼岛                   10\n",
       "陕西省                   54\n",
       "青海省                   49\n",
       "首尔（汉城）                 3\n",
       "香港特别行政区               77\n",
       "黑龙江省                 116\n",
       "dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 根据省份分类，看看每个省份的数据\n",
    "df.groupby(by='家乡').size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>对象学历</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38岁</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31岁</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍      对象年龄         对象身高  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38岁  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31岁  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29岁  168 - 193cm   \n",
       "\n",
       "  对象学历 对象薪水     对象家乡 对象婚姻 对象住房  \n",
       "0   不限   不限      台湾省   不限   不限  \n",
       "1   不限   不限  澳门特别行政区   不限   不限  \n",
       "2   不限   不限      台湾省   不限   不限  \n",
       "3   不限   不限      台湾省   不限   不限  \n",
       "4   不限   不限  香港特别行政区   不限   不限  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 对工资进行处理，分离到最低最高的工资"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对工资进行处理\n",
    "def get_salary_max_min(salary):\n",
    "    try:\n",
    "        result = re.split('-', salary)\n",
    "        return result\n",
    "    except:\n",
    "        return salary\n",
    "salary = df['工资'].apply(get_salary_max_min)\n",
    "df['最低工资'] = salary.str[0]\n",
    "df['最高工资'] = salary.str[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 2827 entries, 0 to 2874\n",
      "Data columns (total 17 columns):\n",
      " #   Column  Non-Null Count  Dtype \n",
      "---  ------  --------------  ----- \n",
      " 0   年龄      2827 non-null   object\n",
      " 1   身高      2827 non-null   object\n",
      " 2   学历      2827 non-null   object\n",
      " 3   工资      2827 non-null   object\n",
      " 4   家乡      2827 non-null   object\n",
      " 5   婚姻      2827 non-null   object\n",
      " 6   住房      2827 non-null   object\n",
      " 7   自我介绍    2827 non-null   object\n",
      " 8   对象年龄    2827 non-null   object\n",
      " 9   对象身高    2827 non-null   object\n",
      " 10  对象学历    2827 non-null   object\n",
      " 11  对象薪水    2827 non-null   object\n",
      " 12  对象家乡    2827 non-null   object\n",
      " 13  对象婚姻    2827 non-null   object\n",
      " 14  对象住房    2827 non-null   object\n",
      " 15  最低工资    2827 non-null   object\n",
      " 16  最高工资    2370 non-null   object\n",
      "dtypes: object(17)\n",
      "memory usage: 477.5+ KB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **把不符合规范带有中文的数据处理了**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>对象学历</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38岁</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31岁</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍      对象年龄         对象身高  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38岁  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31岁  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29岁  168 - 193cm   \n",
       "\n",
       "  对象学历 对象薪水     对象家乡 对象婚姻 对象住房   最低工资   最高工资  \n",
       "0   不限   不限      台湾省   不限   不限  10000  20000  \n",
       "1   不限   不限  澳门特别行政区   不限   不限   5000  10000  \n",
       "2   不限   不限      台湾省   不限   不限  10000  20000  \n",
       "3   不限   不限      台湾省   不限   不限  10000  20000  \n",
       "4   不限   不限  香港特别行政区   不限   不限   5000  10000  "
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "indexs = df[df['最低工资'] == '2000以下'].index\n",
    "df.loc[indexs, '最低工资'] = '2000'\n",
    "df.loc[indexs, '最高工资'] = '2000'\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>对象学历</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>51</td>\n",
       "      <td>163</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>需要时购置</td>\n",
       "      <td>\\n                    岁月在流逝，只想找个和我相携一生的伴侣，那是最美...</td>\n",
       "      <td>50 - 60岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154</th>\n",
       "      <td>54</td>\n",
       "      <td>172</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>吉林省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>住亲朋家</td>\n",
       "      <td>\\n                    也许我很平凡，但是我绝不缺乏生活的热情和生命的梦...</td>\n",
       "      <td>53 - 63岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>吉林省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>177</th>\n",
       "      <td>33</td>\n",
       "      <td>175</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>27 - 38岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181</th>\n",
       "      <td>72</td>\n",
       "      <td>169</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>江西省</td>\n",
       "      <td>离异</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>21 - 32岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>江西省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>207</th>\n",
       "      <td>58</td>\n",
       "      <td>180</td>\n",
       "      <td>初中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>已购房(有贷款)</td>\n",
       "      <td>\\n                    真想把“爱情“抓到面前来，揪住它的衣领恶狠狠的对...</td>\n",
       "      <td>58 - 68岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>255</th>\n",
       "      <td>31</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>云南省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>30 - 40岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>云南省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>307</th>\n",
       "      <td>29</td>\n",
       "      <td>177</td>\n",
       "      <td>初中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>甘肃省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>已购房(有贷款)</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>28 - 38岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>甘肃省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>346</th>\n",
       "      <td>58</td>\n",
       "      <td>173</td>\n",
       "      <td>大专</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>22 - 33岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>511</th>\n",
       "      <td>19</td>\n",
       "      <td>153</td>\n",
       "      <td>硕士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>山西省</td>\n",
       "      <td>丧偶</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>18 - 18岁</td>\n",
       "      <td>157 - 157cm</td>\n",
       "      <td>中专/职高/技校</td>\n",
       "      <td>不限</td>\n",
       "      <td>山西省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>600</th>\n",
       "      <td>31</td>\n",
       "      <td>166</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>浙江省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    我想要的未来，有房子住，不用多大，最好窗外有阳光...</td>\n",
       "      <td>30 - 40岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>浙江省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>883</th>\n",
       "      <td>34</td>\n",
       "      <td>169</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我是3G百合用户，在百合，我希望自己不是这里的一...</td>\n",
       "      <td>28 - 36岁</td>\n",
       "      <td>169 - 189cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>以后再告诉你,与父母同住,需要时购置,已购房(无贷款),住单位房,住亲朋家</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1079</th>\n",
       "      <td>25</td>\n",
       "      <td>175</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>青海省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>20 - 60岁</td>\n",
       "      <td>150 - 195cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>青海省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1082</th>\n",
       "      <td>67</td>\n",
       "      <td>155</td>\n",
       "      <td>初中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>广东省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>65 - 75岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>2000以下,2000-5000</td>\n",
       "      <td>广东省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你,与父母同住,租房,已购房(有贷款),已购房(无贷款),住单位房,住亲朋家</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1084</th>\n",
       "      <td>28</td>\n",
       "      <td>151</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>海南省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>27 - 37岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>2000以下,2000-5000</td>\n",
       "      <td>海南省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你,与父母同住,租房,已购房(有贷款),已购房(无贷款),住单位房,住亲朋家</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1085</th>\n",
       "      <td>33</td>\n",
       "      <td>152</td>\n",
       "      <td>高中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>31 - 41岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>2000以下,2000-5000</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你,与父母同住,租房,已购房(有贷款),已购房(无贷款),住单位房,住亲朋家</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1121</th>\n",
       "      <td>40</td>\n",
       "      <td>190</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>Bayern</td>\n",
       "      <td>未婚</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    豆腐干 \\t\\t\\t\\t</td>\n",
       "      <td>20 - 64岁</td>\n",
       "      <td>144 - 196cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000-10000,10000-20000,20000-50000</td>\n",
       "      <td>Bayern</td>\n",
       "      <td>未婚,离异</td>\n",
       "      <td>已购房(无贷款),住单位房</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1126</th>\n",
       "      <td>30</td>\n",
       "      <td>168</td>\n",
       "      <td>硕士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>未婚</td>\n",
       "      <td>租房</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>30 - 60岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1137</th>\n",
       "      <td>32</td>\n",
       "      <td>167</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>27 - 38岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1143</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>27 - 38岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1157</th>\n",
       "      <td>30</td>\n",
       "      <td>150</td>\n",
       "      <td>大专</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>24 - 35岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1199</th>\n",
       "      <td>29</td>\n",
       "      <td>162</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 34岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1230</th>\n",
       "      <td>29</td>\n",
       "      <td>173</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 34岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1306</th>\n",
       "      <td>36</td>\n",
       "      <td>163</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>宁夏回族自治区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我是3G百合用户，在百合，我希望自己不是这里的一...</td>\n",
       "      <td>30 - 38岁</td>\n",
       "      <td>163 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>宁夏回族自治区</td>\n",
       "      <td>不限</td>\n",
       "      <td>以后再告诉你,与父母同住,需要时购置,已购房(无贷款),住单位房,住亲朋家</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1416</th>\n",
       "      <td>31</td>\n",
       "      <td>152</td>\n",
       "      <td>硕士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>丧偶</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>25 - 36岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1439</th>\n",
       "      <td>29</td>\n",
       "      <td>150</td>\n",
       "      <td>高中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>23 - 34岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1451</th>\n",
       "      <td>30</td>\n",
       "      <td>169</td>\n",
       "      <td>大专</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>24 - 35岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1461</th>\n",
       "      <td>68</td>\n",
       "      <td>172</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>贵州省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>62 - 73岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>贵州省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1511</th>\n",
       "      <td>58</td>\n",
       "      <td>173</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>安徽省</td>\n",
       "      <td>离异</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>51 - 62岁</td>\n",
       "      <td>173 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>安徽省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1572</th>\n",
       "      <td>57</td>\n",
       "      <td>167</td>\n",
       "      <td>大专</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>广西壮族自治区</td>\n",
       "      <td>丧偶</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>19 - 30岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>广西壮族自治区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1688</th>\n",
       "      <td>34</td>\n",
       "      <td>176</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>青海省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>31 - 39岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>不限</td>\n",
       "      <td>青海省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1709</th>\n",
       "      <td>32</td>\n",
       "      <td>160</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    你们好：我是张玉u，不知道你看我的资料还觉得合适...</td>\n",
       "      <td>28 - 39岁</td>\n",
       "      <td>160 - 180cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1713</th>\n",
       "      <td>33</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>26 - 37岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1722</th>\n",
       "      <td>28</td>\n",
       "      <td>168</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    每个人心中都有向往的爱情，我的爱情在哪里呢？\\t...</td>\n",
       "      <td>22 - 33岁</td>\n",
       "      <td>168 - 188cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1723</th>\n",
       "      <td>31</td>\n",
       "      <td>167</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>25 - 40岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>北京市</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1744</th>\n",
       "      <td>29</td>\n",
       "      <td>169</td>\n",
       "      <td>高中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    我总是爱逞强，不愿说出我有多么爱你，不肯彻底表露...</td>\n",
       "      <td>23 - 34岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1772</th>\n",
       "      <td>31</td>\n",
       "      <td>156</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    幸福是什么呢？不就是两人牵手偕老过一辈子吗？我已...</td>\n",
       "      <td>25 - 36岁</td>\n",
       "      <td>156 - 176cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1790</th>\n",
       "      <td>32</td>\n",
       "      <td>177</td>\n",
       "      <td>初中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    躲在某一时间，想念一段时光的掌纹；躲在某一地点，...</td>\n",
       "      <td>25 - 36岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1791</th>\n",
       "      <td>31</td>\n",
       "      <td>178</td>\n",
       "      <td>博士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    一个人的天空很蓝，蓝的有点忧郁。一个人的日子自由...</td>\n",
       "      <td>24 - 35岁</td>\n",
       "      <td>160 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1854</th>\n",
       "      <td>33</td>\n",
       "      <td>175</td>\n",
       "      <td>硕士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>安徽省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>与父母同住</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>32 - 42岁</td>\n",
       "      <td>170 - 190cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>安徽省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2341</th>\n",
       "      <td>48</td>\n",
       "      <td>158</td>\n",
       "      <td>硕士</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>离异</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    希望找到一个真诚，自信，积极，阳光的另一半，相扶...</td>\n",
       "      <td>42 - 52岁</td>\n",
       "      <td>175 - 211cm</td>\n",
       "      <td>本科,硕士,博士</td>\n",
       "      <td>20000-50000</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2349</th>\n",
       "      <td>61</td>\n",
       "      <td>159</td>\n",
       "      <td>高中</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>黑龙江省</td>\n",
       "      <td>离异</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    照片都是近照。想互相了解的加我了解，不是会员不方...</td>\n",
       "      <td>52 - 60岁</td>\n",
       "      <td>175 - 211cm</td>\n",
       "      <td>初中,中专/职高/技校,高中,大专,本科,硕士,博士</td>\n",
       "      <td>2000以下,2000-5000,5000-10000,10000-20000,20000-...</td>\n",
       "      <td>黑龙江省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2500</th>\n",
       "      <td>62</td>\n",
       "      <td>163</td>\n",
       "      <td>大专</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>湖北省</td>\n",
       "      <td>丧偶</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    我喜欢音乐，喜欢唱歌，喜欢摄影，喜欢旅游。曾经是...</td>\n",
       "      <td>56 - 63岁</td>\n",
       "      <td>174 - 180cm</td>\n",
       "      <td>大专,本科,硕士,博士</td>\n",
       "      <td>2000-5000,5000-10000,10000-20000,20000-50000,&gt;...</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>不限</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2550</th>\n",
       "      <td>30</td>\n",
       "      <td>167</td>\n",
       "      <td>大专</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>天津市</td>\n",
       "      <td>未婚</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    幸福是什么呢？不就是两人牵手偕老过一辈子吗？我已...</td>\n",
       "      <td>25 - 36岁</td>\n",
       "      <td>167 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>福建省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2712</th>\n",
       "      <td>39</td>\n",
       "      <td>167</td>\n",
       "      <td>本科</td>\n",
       "      <td>&gt;50000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>离异</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    大家好，我来自台湾省台北市，在百合网真心寻找一个...</td>\n",
       "      <td>33 - 44岁</td>\n",
       "      <td>167 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      年龄   身高  学历      工资       家乡  婚姻        住房  \\\n",
       "61    51  163  博士  >50000      江苏省  未婚     需要时购置   \n",
       "154   54  172  博士  >50000      吉林省  未婚      住亲朋家   \n",
       "177   33  175  博士  >50000    以后告诉你  未婚     以后告诉你   \n",
       "181   72  169  博士  >50000      江西省  离异     以后告诉你   \n",
       "207   58  180  初中  >50000      江苏省  未婚  已购房(有贷款)   \n",
       "255   31  162  高中  >50000      云南省  未婚  已购房(无贷款)   \n",
       "307   29  177  初中  >50000      甘肃省  未婚  已购房(有贷款)   \n",
       "346   58  173  大专  >50000    西藏自治区  未婚     以后告诉你   \n",
       "511   19  153  硕士  >50000      山西省  丧偶     以后告诉你   \n",
       "600   31  166  博士  >50000      浙江省  未婚  已购房(无贷款)   \n",
       "883   34  169  本科  >50000      台湾省  未婚    以后再告诉你   \n",
       "1079  25  175  博士  >50000      青海省  未婚     以后告诉你   \n",
       "1082  67  155  初中  >50000      广东省  未婚     以后告诉你   \n",
       "1084  28  151  本科  >50000      海南省  未婚     以后告诉你   \n",
       "1085  33  152  高中  >50000    西藏自治区  未婚     以后告诉你   \n",
       "1121  40  190  博士  >50000   Bayern  未婚  已购房(无贷款)   \n",
       "1126  30  168  硕士  >50000    以后告诉你  未婚        租房   \n",
       "1137  32  167  博士  >50000    以后告诉你  未婚     以后告诉你   \n",
       "1143  31  161  博士  >50000    以后告诉你  未婚     以后告诉你   \n",
       "1157  30  150  大专  >50000    以后告诉你  未婚     以后告诉你   \n",
       "1199  29  162  本科  >50000    以后告诉你  未婚     以后告诉你   \n",
       "1230  29  173  博士  >50000    以后告诉你  未婚     以后告诉你   \n",
       "1306  36  163  博士  >50000  宁夏回族自治区  未婚    以后再告诉你   \n",
       "1416  31  152  硕士  >50000    西藏自治区  丧偶     以后告诉你   \n",
       "1439  29  150  高中  >50000    西藏自治区  未婚     以后告诉你   \n",
       "1451  30  169  大专  >50000    西藏自治区  未婚     以后告诉你   \n",
       "1461  68  172  博士  >50000      贵州省  未婚     以后告诉你   \n",
       "1511  58  173  本科  >50000      安徽省  离异     以后告诉你   \n",
       "1572  57  167  大专  >50000  广西壮族自治区  丧偶     以后告诉你   \n",
       "1688  34  176  本科  >50000      青海省  未婚     以后告诉你   \n",
       "1709  32  160  本科  >50000  香港特别行政区  未婚     以后告诉你   \n",
       "1713  33  160  大专  >50000  香港特别行政区  未婚    以后再告诉你   \n",
       "1722  28  168  本科  >50000  香港特别行政区  未婚  已购房(无贷款)   \n",
       "1723  31  167  本科  >50000  香港特别行政区  未婚     以后告诉你   \n",
       "1744  29  169  高中  >50000  澳门特别行政区  未婚     以后告诉你   \n",
       "1772  31  156  本科  >50000      台湾省  未婚     以后告诉你   \n",
       "1790  32  177  初中  >50000      台湾省  未婚     以后告诉你   \n",
       "1791  31  178  博士  >50000      台湾省  未婚     以后告诉你   \n",
       "1854  33  175  硕士  >50000      安徽省  未婚     与父母同住   \n",
       "2341  48  158  硕士  >50000      江苏省  离异  已购房(无贷款)   \n",
       "2349  61  159  高中  >50000     黑龙江省  离异     以后告诉你   \n",
       "2500  62  163  大专  >50000      湖北省  丧偶  已购房(无贷款)   \n",
       "2550  30  167  大专  >50000      天津市  未婚  已购房(无贷款)   \n",
       "2712  39  167  本科  >50000      台湾省  离异     以后告诉你   \n",
       "\n",
       "                                                   自我介绍      对象年龄  \\\n",
       "61    \\n                    岁月在流逝，只想找个和我相携一生的伴侣，那是最美...  50 - 60岁   \n",
       "154   \\n                    也许我很平凡，但是我绝不缺乏生活的热情和生命的梦...  53 - 63岁   \n",
       "177   \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  27 - 38岁   \n",
       "181   \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  21 - 32岁   \n",
       "207   \\n                    真想把“爱情“抓到面前来，揪住它的衣领恶狠狠的对...  58 - 68岁   \n",
       "255   \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  30 - 40岁   \n",
       "307   \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  28 - 38岁   \n",
       "346   \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  22 - 33岁   \n",
       "511   \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  18 - 18岁   \n",
       "600   \\n                    我想要的未来，有房子住，不用多大，最好窗外有阳光...  30 - 40岁   \n",
       "883   \\n                    我是3G百合用户，在百合，我希望自己不是这里的一...  28 - 36岁   \n",
       "1079  \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  20 - 60岁   \n",
       "1082  \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  65 - 75岁   \n",
       "1084  \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  27 - 37岁   \n",
       "1085  \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  31 - 41岁   \n",
       "1121                 \\n                    豆腐干 \\t\\t\\t\\t  20 - 64岁   \n",
       "1126  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  30 - 60岁   \n",
       "1137  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  27 - 38岁   \n",
       "1143  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  27 - 38岁   \n",
       "1157  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  24 - 35岁   \n",
       "1199  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 34岁   \n",
       "1230  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 34岁   \n",
       "1306  \\n                    我是3G百合用户，在百合，我希望自己不是这里的一...  30 - 38岁   \n",
       "1416  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  25 - 36岁   \n",
       "1439  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  23 - 34岁   \n",
       "1451  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  24 - 35岁   \n",
       "1461  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  62 - 73岁   \n",
       "1511  \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  51 - 62岁   \n",
       "1572  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  19 - 30岁   \n",
       "1688  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  31 - 39岁   \n",
       "1709  \\n                    你们好：我是张玉u，不知道你看我的资料还觉得合适...  28 - 39岁   \n",
       "1713  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  26 - 37岁   \n",
       "1722  \\n                    每个人心中都有向往的爱情，我的爱情在哪里呢？\\t...  22 - 33岁   \n",
       "1723  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  25 - 40岁   \n",
       "1744  \\n                    我总是爱逞强，不愿说出我有多么爱你，不肯彻底表露...  23 - 34岁   \n",
       "1772  \\n                    幸福是什么呢？不就是两人牵手偕老过一辈子吗？我已...  25 - 36岁   \n",
       "1790  \\n                    躲在某一时间，想念一段时光的掌纹；躲在某一地点，...  25 - 36岁   \n",
       "1791  \\n                    一个人的天空很蓝，蓝的有点忧郁。一个人的日子自由...  24 - 35岁   \n",
       "1854  \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  32 - 42岁   \n",
       "2341  \\n                    希望找到一个真诚，自信，积极，阳光的另一半，相扶...  42 - 52岁   \n",
       "2349  \\n                    照片都是近照。想互相了解的加我了解，不是会员不方...  52 - 60岁   \n",
       "2500  \\n                    我喜欢音乐，喜欢唱歌，喜欢摄影，喜欢旅游。曾经是...  56 - 63岁   \n",
       "2550  \\n                    幸福是什么呢？不就是两人牵手偕老过一辈子吗？我已...  25 - 36岁   \n",
       "2712  \\n                    大家好，我来自台湾省台北市，在百合网真心寻找一个...  33 - 44岁   \n",
       "\n",
       "             对象身高                        对象学历  \\\n",
       "61    170 - 190cm                          不限   \n",
       "154   170 - 190cm                          不限   \n",
       "177   160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "181   160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "207   170 - 190cm                          不限   \n",
       "255   170 - 190cm                          不限   \n",
       "307   170 - 190cm                          不限   \n",
       "346   160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "511   157 - 157cm                    中专/职高/技校   \n",
       "600   170 - 190cm                          不限   \n",
       "883   169 - 189cm                          不限   \n",
       "1079  150 - 195cm                          不限   \n",
       "1082  170 - 190cm                          不限   \n",
       "1084  170 - 190cm                          不限   \n",
       "1085  170 - 190cm                          不限   \n",
       "1121  144 - 196cm                          不限   \n",
       "1126  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1137  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1143  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1157  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1199  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1230  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1306  163 - 183cm                          不限   \n",
       "1416  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1439  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1451  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1461  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1511  173 - 193cm                          不限   \n",
       "1572  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1688  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1709  160 - 180cm                          不限   \n",
       "1713  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1722  168 - 188cm                          不限   \n",
       "1723  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1744  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1772  156 - 176cm                          不限   \n",
       "1790  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1791  160 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "1854  170 - 190cm                          不限   \n",
       "2341  175 - 211cm                    本科,硕士,博士   \n",
       "2349  175 - 211cm  初中,中专/职高/技校,高中,大专,本科,硕士,博士   \n",
       "2500  174 - 180cm                 大专,本科,硕士,博士   \n",
       "2550  167 - 187cm                          不限   \n",
       "2712  167 - 187cm                          不限   \n",
       "\n",
       "                                                   对象薪水     对象家乡   对象婚姻  \\\n",
       "61                                                   不限      江苏省     不限   \n",
       "154                                                  不限      吉林省     不限   \n",
       "177   2000以下,2000-5000,5000-10000,10000-20000,20000-...       不限     不限   \n",
       "181   2000以下,2000-5000,5000-10000,10000-20000,20000-...      江西省     不限   \n",
       "207                                                  不限      江苏省     不限   \n",
       "255                                                  不限      云南省     不限   \n",
       "307                                                  不限      甘肃省     不限   \n",
       "346   2000以下,2000-5000,5000-10000,10000-20000,20000-...    西藏自治区     不限   \n",
       "511                                                  不限      山西省     未婚   \n",
       "600                                                  不限      浙江省     不限   \n",
       "883   2000以下,2000-5000,5000-10000,10000-20000,20000-...      台湾省     不限   \n",
       "1079                                                 不限      青海省     不限   \n",
       "1082                                   2000以下,2000-5000      广东省     未婚   \n",
       "1084                                   2000以下,2000-5000      海南省     未婚   \n",
       "1085                                   2000以下,2000-5000    西藏自治区     未婚   \n",
       "1121                 5000-10000,10000-20000,20000-50000   Bayern  未婚,离异   \n",
       "1126  2000以下,2000-5000,5000-10000,10000-20000,20000-...      江苏省     不限   \n",
       "1137  2000以下,2000-5000,5000-10000,10000-20000,20000-...       不限     不限   \n",
       "1143  2000以下,2000-5000,5000-10000,10000-20000,20000-...       不限     不限   \n",
       "1157  2000以下,2000-5000,5000-10000,10000-20000,20000-...       不限     不限   \n",
       "1199  2000以下,2000-5000,5000-10000,10000-20000,20000-...       不限     不限   \n",
       "1230  2000以下,2000-5000,5000-10000,10000-20000,20000-...       不限     不限   \n",
       "1306  2000以下,2000-5000,5000-10000,10000-20000,20000-...  宁夏回族自治区     不限   \n",
       "1416  2000以下,2000-5000,5000-10000,10000-20000,20000-...    西藏自治区     不限   \n",
       "1439  2000以下,2000-5000,5000-10000,10000-20000,20000-...    西藏自治区     不限   \n",
       "1451  2000以下,2000-5000,5000-10000,10000-20000,20000-...    西藏自治区     不限   \n",
       "1461  2000以下,2000-5000,5000-10000,10000-20000,20000-...      贵州省     不限   \n",
       "1511                                                 不限      安徽省     不限   \n",
       "1572  2000以下,2000-5000,5000-10000,10000-20000,20000-...  广西壮族自治区     不限   \n",
       "1688                                                 不限      青海省     不限   \n",
       "1709                                                 不限  香港特别行政区     不限   \n",
       "1713  2000以下,2000-5000,5000-10000,10000-20000,20000-...  香港特别行政区     不限   \n",
       "1722                                                 不限  香港特别行政区     不限   \n",
       "1723  2000以下,2000-5000,5000-10000,10000-20000,20000-...      北京市     不限   \n",
       "1744  2000以下,2000-5000,5000-10000,10000-20000,20000-...  澳门特别行政区     不限   \n",
       "1772                                                 不限      台湾省     不限   \n",
       "1790  2000以下,2000-5000,5000-10000,10000-20000,20000-...      台湾省     不限   \n",
       "1791  2000以下,2000-5000,5000-10000,10000-20000,20000-...      台湾省     不限   \n",
       "1854                                                 不限      安徽省     不限   \n",
       "2341                                        20000-50000      江苏省     不限   \n",
       "2349  2000以下,2000-5000,5000-10000,10000-20000,20000-...     黑龙江省     不限   \n",
       "2500  2000-5000,5000-10000,10000-20000,20000-50000,>...      江苏省     不限   \n",
       "2550                                                 不限      福建省     不限   \n",
       "2712                                                 不限      台湾省     不限   \n",
       "\n",
       "                                             对象住房   最低工资   最高工资  \n",
       "61                                             不限  50000  50000  \n",
       "154                                            不限  50000  50000  \n",
       "177                                            不限  50000  50000  \n",
       "181                                            不限  50000  50000  \n",
       "207                                            不限  50000  50000  \n",
       "255                                            不限  50000  50000  \n",
       "307                                            不限  50000  50000  \n",
       "346                                            不限  50000  50000  \n",
       "511                                        以后再告诉你  50000  50000  \n",
       "600                                            不限  50000  50000  \n",
       "883         以后再告诉你,与父母同住,需要时购置,已购房(无贷款),住单位房,住亲朋家  50000  50000  \n",
       "1079                                           不限  50000  50000  \n",
       "1082  以后再告诉你,与父母同住,租房,已购房(有贷款),已购房(无贷款),住单位房,住亲朋家  50000  50000  \n",
       "1084  以后再告诉你,与父母同住,租房,已购房(有贷款),已购房(无贷款),住单位房,住亲朋家  50000  50000  \n",
       "1085  以后再告诉你,与父母同住,租房,已购房(有贷款),已购房(无贷款),住单位房,住亲朋家  50000  50000  \n",
       "1121                                已购房(无贷款),住单位房  50000  50000  \n",
       "1126                                           不限  50000  50000  \n",
       "1137                                           不限  50000  50000  \n",
       "1143                                           不限  50000  50000  \n",
       "1157                                           不限  50000  50000  \n",
       "1199                                           不限  50000  50000  \n",
       "1230                                           不限  50000  50000  \n",
       "1306        以后再告诉你,与父母同住,需要时购置,已购房(无贷款),住单位房,住亲朋家  50000  50000  \n",
       "1416                                           不限  50000  50000  \n",
       "1439                                           不限  50000  50000  \n",
       "1451                                           不限  50000  50000  \n",
       "1461                                           不限  50000  50000  \n",
       "1511                                           不限  50000  50000  \n",
       "1572                                           不限  50000  50000  \n",
       "1688                                           不限  50000  50000  \n",
       "1709                                           不限  50000  50000  \n",
       "1713                                           不限  50000  50000  \n",
       "1722                                           不限  50000  50000  \n",
       "1723                                           不限  50000  50000  \n",
       "1744                                           不限  50000  50000  \n",
       "1772                                           不限  50000  50000  \n",
       "1790                                           不限  50000  50000  \n",
       "1791                                           不限  50000  50000  \n",
       "1854                                           不限  50000  50000  \n",
       "2341                                           不限  50000  50000  \n",
       "2349                                           不限  50000  50000  \n",
       "2500                                     已购房(无贷款)  50000  50000  \n",
       "2550                                           不限  50000  50000  \n",
       "2712                                           不限  50000  50000  "
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "indexs = df[df['最低工资'] == '>50000'].index\n",
    "df.loc[indexs, '最低工资'] = '50000'\n",
    "df.loc[indexs, '最高工资'] = '50000'\n",
    "df.loc[indexs]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **转化为数字类型**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 2827 entries, 0 to 2874\n",
      "Data columns (total 17 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   年龄      2827 non-null   object \n",
      " 1   身高      2827 non-null   object \n",
      " 2   学历      2827 non-null   object \n",
      " 3   工资      2827 non-null   object \n",
      " 4   家乡      2827 non-null   object \n",
      " 5   婚姻      2827 non-null   object \n",
      " 6   住房      2827 non-null   object \n",
      " 7   自我介绍    2827 non-null   object \n",
      " 8   对象年龄    2827 non-null   object \n",
      " 9   对象身高    2827 non-null   object \n",
      " 10  对象学历    2827 non-null   object \n",
      " 11  对象薪水    2827 non-null   object \n",
      " 12  对象家乡    2827 non-null   object \n",
      " 13  对象婚姻    2827 non-null   object \n",
      " 14  对象住房    2827 non-null   object \n",
      " 15  最低工资    2827 non-null   int64  \n",
      " 16  最高工资    2827 non-null   float64\n",
      "dtypes: float64(1), int64(1), object(15)\n",
      "memory usage: 477.5+ KB\n"
     ]
    }
   ],
   "source": [
    "df['最高工资'] = pd.to_numeric(df['最高工资'])\n",
    "df['最低工资'] = pd.to_numeric(df['最低工资'])\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **求工资的平均值**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['平均工资'] = df[['最低工资', '最高工资']].mean(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>对象学历</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "      <th>平均工资</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38岁</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31岁</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29岁</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍      对象年龄         对象身高  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38岁  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29岁  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31岁  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29岁  168 - 193cm   \n",
       "\n",
       "  对象学历 对象薪水     对象家乡 对象婚姻 对象住房   最低工资     最高工资     平均工资  \n",
       "0   不限   不限      台湾省   不限   不限  10000  20000.0  15000.0  \n",
       "1   不限   不限  澳门特别行政区   不限   不限   5000  10000.0   7500.0  \n",
       "2   不限   不限      台湾省   不限   不限  10000  20000.0  15000.0  \n",
       "3   不限   不限      台湾省   不限   不限  10000  20000.0  15000.0  \n",
       "4   不限   不限  香港特别行政区   不限   不限   5000  10000.0   7500.0  "
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 把对象年龄的“岁”去掉，其他处理方法同上"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['对象年龄'] = df['对象年龄'].str[0:7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>对象学历</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>30</td>\n",
       "      <td>38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>23</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍     对象年龄         对象身高  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29  168 - 193cm   \n",
       "\n",
       "  对象学历 对象薪水     对象家乡 对象婚姻 对象住房   最低工资     最高工资     平均工资 对象最低年龄 对象最高年龄  \n",
       "0   不限   不限      台湾省   不限   不限  10000  20000.0  15000.0     21     29  \n",
       "1   不限   不限  澳门特别行政区   不限   不限   5000  10000.0   7500.0     30     38  \n",
       "2   不限   不限      台湾省   不限   不限  10000  20000.0  15000.0     21     29  \n",
       "3   不限   不限      台湾省   不限   不限  10000  20000.0  15000.0     23     31  \n",
       "4   不限   不限  香港特别行政区   不限   不限   5000  10000.0   7500.0     21     29  "
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对对象年龄进行处理\n",
    "def get_he_age_max_min(he_age):\n",
    "    try:\n",
    "        result = re.split(' - ', he_age)\n",
    "        return result\n",
    "    except:\n",
    "        return he_age\n",
    "he_age = df['对象年龄'].apply(get_he_age_max_min)\n",
    "df['对象最低年龄'] = he_age.str[0]\n",
    "df['对象最高年龄'] = he_age.str[1]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 2827 entries, 0 to 2874\n",
      "Data columns (total 20 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   年龄      2827 non-null   object \n",
      " 1   身高      2827 non-null   object \n",
      " 2   学历      2827 non-null   object \n",
      " 3   工资      2827 non-null   object \n",
      " 4   家乡      2827 non-null   object \n",
      " 5   婚姻      2827 non-null   object \n",
      " 6   住房      2827 non-null   object \n",
      " 7   自我介绍    2827 non-null   object \n",
      " 8   对象年龄    2827 non-null   object \n",
      " 9   对象身高    2827 non-null   object \n",
      " 10  对象学历    2827 non-null   object \n",
      " 11  对象薪水    2827 non-null   object \n",
      " 12  对象家乡    2827 non-null   object \n",
      " 13  对象婚姻    2827 non-null   object \n",
      " 14  对象住房    2827 non-null   object \n",
      " 15  最低工资    2827 non-null   int64  \n",
      " 16  最高工资    2827 non-null   float64\n",
      " 17  平均工资    2827 non-null   float64\n",
      " 18  对象最低年龄  2827 non-null   int64  \n",
      " 19  对象最高年龄  2827 non-null   int64  \n",
      "dtypes: float64(2), int64(3), object(15)\n",
      "memory usage: 543.8+ KB\n"
     ]
    }
   ],
   "source": [
    "df['对象最低年龄'] = pd.to_numeric(df['对象最低年龄'])\n",
    "df['对象最高年龄'] = pd.to_numeric(df['对象最高年龄'])\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['对象平均年龄'] = df[['对象最低年龄', '对象最高年龄']].mean(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>...</th>\n",
       "      <th>对象薪水</th>\n",
       "      <th>对象家乡</th>\n",
       "      <th>对象婚姻</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "      <th>对象平均年龄</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>162 - 187cm</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38</td>\n",
       "      <td>161 - 186cm</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>30</td>\n",
       "      <td>38</td>\n",
       "      <td>34.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>158 - 183cm</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31</td>\n",
       "      <td>160 - 185cm</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>23</td>\n",
       "      <td>31</td>\n",
       "      <td>27.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>168 - 193cm</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>不限</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍     对象年龄         对象身高  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  162 - 187cm   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38  161 - 186cm   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  158 - 183cm   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31  160 - 185cm   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29  168 - 193cm   \n",
       "\n",
       "   ... 对象薪水     对象家乡 对象婚姻 对象住房   最低工资     最高工资     平均工资  对象最低年龄  对象最高年龄  \\\n",
       "0  ...   不限      台湾省   不限   不限  10000  20000.0  15000.0      21      29   \n",
       "1  ...   不限  澳门特别行政区   不限   不限   5000  10000.0   7500.0      30      38   \n",
       "2  ...   不限      台湾省   不限   不限  10000  20000.0  15000.0      21      29   \n",
       "3  ...   不限      台湾省   不限   不限  10000  20000.0  15000.0      23      31   \n",
       "4  ...   不限  香港特别行政区   不限   不限   5000  10000.0   7500.0      21      29   \n",
       "\n",
       "   对象平均年龄  \n",
       "0    25.0  \n",
       "1    34.0  \n",
       "2    25.0  \n",
       "3    27.0  \n",
       "4    25.0  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0               不限\n",
       "1               不限\n",
       "2               不限\n",
       "3               不限\n",
       "4               不限\n",
       "           ...    \n",
       "2870            不限\n",
       "2871            不限\n",
       "2872            不限\n",
       "2873    5000-10000\n",
       "2874            不限\n",
       "Name: 对象薪水, Length: 2827, dtype: object"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['对象薪水']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 处理对象身高"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['对象身高'] = df['对象身高'].str[0:9]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>...</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "      <th>对象平均年龄</th>\n",
       "      <th>对象最低身高</th>\n",
       "      <th>对象最高身高</th>\n",
       "      <th>对象平均身高</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>162 - 187</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>162</td>\n",
       "      <td>187</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38</td>\n",
       "      <td>161 - 186</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>30</td>\n",
       "      <td>38</td>\n",
       "      <td>34.0</td>\n",
       "      <td>161</td>\n",
       "      <td>186</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>158 - 183</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>158</td>\n",
       "      <td>183</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31</td>\n",
       "      <td>160 - 185</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>23</td>\n",
       "      <td>31</td>\n",
       "      <td>27.0</td>\n",
       "      <td>160</td>\n",
       "      <td>185</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>168 - 193</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>168</td>\n",
       "      <td>193</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍     对象年龄       对象身高  ...  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  162 - 187  ...   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38  161 - 186  ...   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  158 - 183  ...   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31  160 - 185  ...   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29  168 - 193  ...   \n",
       "\n",
       "  对象住房   最低工资     最高工资     平均工资 对象最低年龄  对象最高年龄  对象平均年龄  对象最低身高  对象最高身高  对象平均身高  \n",
       "0   不限  10000  20000.0  15000.0     21      29    25.0     162     187     NaN  \n",
       "1   不限   5000  10000.0   7500.0     30      38    34.0     161     186     NaN  \n",
       "2   不限  10000  20000.0  15000.0     21      29    25.0     158     183     NaN  \n",
       "3   不限  10000  20000.0  15000.0     23      31    27.0     160     185     NaN  \n",
       "4   不限   5000  10000.0   7500.0     21      29    25.0     168     193     NaN  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['对象身高'] \n",
    "\n",
    "# 这里也可以调用算年龄的那个函数，我这里直接调用了，名字没取好，不管了\n",
    "he_height = df['对象身高'].apply(get_he_age_max_min)\n",
    "df['对象最低身高'] = he_height.str[0]\n",
    "df['对象最高身高'] = he_height.str[1]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>...</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "      <th>对象平均年龄</th>\n",
       "      <th>对象最低身高</th>\n",
       "      <th>对象最高身高</th>\n",
       "      <th>对象平均身高</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>162 - 187</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>162</td>\n",
       "      <td>187</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38</td>\n",
       "      <td>161 - 186</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>30</td>\n",
       "      <td>38</td>\n",
       "      <td>34.0</td>\n",
       "      <td>161</td>\n",
       "      <td>186</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>158 - 183</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>158</td>\n",
       "      <td>183</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31</td>\n",
       "      <td>160 - 185</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>23</td>\n",
       "      <td>31</td>\n",
       "      <td>27.0</td>\n",
       "      <td>160</td>\n",
       "      <td>185</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>168 - 193</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>168</td>\n",
       "      <td>193</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍     对象年龄       对象身高  ...  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  162 - 187  ...   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38  161 - 186  ...   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  158 - 183  ...   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31  160 - 185  ...   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29  168 - 193  ...   \n",
       "\n",
       "  对象住房   最低工资     最高工资     平均工资 对象最低年龄  对象最高年龄  对象平均年龄  对象最低身高  对象最高身高  对象平均身高  \n",
       "0   不限  10000  20000.0  15000.0     21      29    25.0     162     187     NaN  \n",
       "1   不限   5000  10000.0   7500.0     30      38    34.0     161     186     NaN  \n",
       "2   不限  10000  20000.0  15000.0     21      29    25.0     158     183     NaN  \n",
       "3   不限  10000  20000.0  15000.0     23      31    27.0     160     185     NaN  \n",
       "4   不限   5000  10000.0   7500.0     21      29    25.0     168     193     NaN  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "indexs = df[df['对象最低身高'] == '不限'].index\n",
    "df.loc[indexs, '对象最低身高'] = '0'\n",
    "df.loc[indexs, '对象最高身高'] = '0'\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 2827 entries, 0 to 2874\n",
      "Data columns (total 24 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   年龄      2827 non-null   object \n",
      " 1   身高      2827 non-null   object \n",
      " 2   学历      2827 non-null   object \n",
      " 3   工资      2827 non-null   object \n",
      " 4   家乡      2827 non-null   object \n",
      " 5   婚姻      2827 non-null   object \n",
      " 6   住房      2827 non-null   object \n",
      " 7   自我介绍    2827 non-null   object \n",
      " 8   对象年龄    2827 non-null   object \n",
      " 9   对象身高    2827 non-null   object \n",
      " 10  对象学历    2827 non-null   object \n",
      " 11  对象薪水    2827 non-null   object \n",
      " 12  对象家乡    2827 non-null   object \n",
      " 13  对象婚姻    2827 non-null   object \n",
      " 14  对象住房    2827 non-null   object \n",
      " 15  最低工资    2827 non-null   int64  \n",
      " 16  最高工资    2827 non-null   float64\n",
      " 17  平均工资    2827 non-null   float64\n",
      " 18  对象最低年龄  2827 non-null   int64  \n",
      " 19  对象最高年龄  2827 non-null   int64  \n",
      " 20  对象平均年龄  2827 non-null   float64\n",
      " 21  对象最低身高  2827 non-null   int64  \n",
      " 22  对象最高身高  2827 non-null   int64  \n",
      " 23  对象平均身高  0 non-null      float64\n",
      "dtypes: float64(4), int64(5), object(15)\n",
      "memory usage: 632.1+ KB\n"
     ]
    }
   ],
   "source": [
    "\n",
    "df['对象最低身高'] = pd.to_numeric(df['对象最低身高'])\n",
    "df['对象最高身高'] = pd.to_numeric(df['对象最高身高'])\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['对象平均身高'] = df[['对象最低身高', '对象最高身高']].mean(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>...</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "      <th>对象平均年龄</th>\n",
       "      <th>对象最低身高</th>\n",
       "      <th>对象最高身高</th>\n",
       "      <th>对象平均身高</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>162 - 187</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>162</td>\n",
       "      <td>187</td>\n",
       "      <td>174.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38</td>\n",
       "      <td>161 - 186</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>30</td>\n",
       "      <td>38</td>\n",
       "      <td>34.0</td>\n",
       "      <td>161</td>\n",
       "      <td>186</td>\n",
       "      <td>173.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>158 - 183</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>158</td>\n",
       "      <td>183</td>\n",
       "      <td>170.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31</td>\n",
       "      <td>160 - 185</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>23</td>\n",
       "      <td>31</td>\n",
       "      <td>27.0</td>\n",
       "      <td>160</td>\n",
       "      <td>185</td>\n",
       "      <td>172.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>168 - 193</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>168</td>\n",
       "      <td>193</td>\n",
       "      <td>180.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍     对象年龄       对象身高  ...  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  162 - 187  ...   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38  161 - 186  ...   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  158 - 183  ...   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31  160 - 185  ...   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29  168 - 193  ...   \n",
       "\n",
       "  对象住房   最低工资     最高工资     平均工资 对象最低年龄  对象最高年龄  对象平均年龄  对象最低身高  对象最高身高  对象平均身高  \n",
       "0   不限  10000  20000.0  15000.0     21      29    25.0     162     187   174.5  \n",
       "1   不限   5000  10000.0   7500.0     30      38    34.0     161     186   173.5  \n",
       "2   不限  10000  20000.0  15000.0     21      29    25.0     158     183   170.5  \n",
       "3   不限  10000  20000.0  15000.0     23      31    27.0     160     185   172.5  \n",
       "4   不限   5000  10000.0   7500.0     21      29    25.0     168     193   180.5  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['对象平均身高']\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "178.13371064732934"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 看看我的身高够不够平均值哈哈哈哈\n",
    "df['对象平均身高'].mean(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 我去 我输了 呜呜呜"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 处理对象薪水"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>...</th>\n",
       "      <th>对象住房</th>\n",
       "      <th>最低工资</th>\n",
       "      <th>最高工资</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "      <th>对象平均年龄</th>\n",
       "      <th>对象最低身高</th>\n",
       "      <th>对象最高身高</th>\n",
       "      <th>对象平均身高</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>162 - 187</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>162</td>\n",
       "      <td>187</td>\n",
       "      <td>174.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38</td>\n",
       "      <td>161 - 186</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>30</td>\n",
       "      <td>38</td>\n",
       "      <td>34.0</td>\n",
       "      <td>161</td>\n",
       "      <td>186</td>\n",
       "      <td>173.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>158 - 183</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>158</td>\n",
       "      <td>183</td>\n",
       "      <td>170.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31</td>\n",
       "      <td>160 - 185</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>23</td>\n",
       "      <td>31</td>\n",
       "      <td>27.0</td>\n",
       "      <td>160</td>\n",
       "      <td>185</td>\n",
       "      <td>172.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>168 - 193</td>\n",
       "      <td>...</td>\n",
       "      <td>不限</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>168</td>\n",
       "      <td>193</td>\n",
       "      <td>180.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍     对象年龄       对象身高  ...  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  162 - 187  ...   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38  161 - 186  ...   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  158 - 183  ...   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31  160 - 185  ...   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29  168 - 193  ...   \n",
       "\n",
       "  对象住房   最低工资     最高工资     平均工资 对象最低年龄  对象最高年龄  对象平均年龄  对象最低身高  对象最高身高  对象平均身高  \n",
       "0   不限  10000  20000.0  15000.0     21      29    25.0     162     187   174.5  \n",
       "1   不限   5000  10000.0   7500.0     30      38    34.0     161     186   173.5  \n",
       "2   不限  10000  20000.0  15000.0     21      29    25.0     158     183   170.5  \n",
       "3   不限  10000  20000.0  15000.0     23      31    27.0     160     185   172.5  \n",
       "4   不限   5000  10000.0   7500.0     21      29    25.0     168     193   180.5  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 再处理下对象的工资吧\n",
    "# 把不限的改成 '0-0'待会就好处理了\n",
    "indexs = df[df['对象薪水'] == '不限'].index\n",
    "df.loc[indexs, '对象薪水'] = '0-0'\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0              0-0\n",
       "1              0-0\n",
       "2              0-0\n",
       "3              0-0\n",
       "4              0-0\n",
       "           ...    \n",
       "2870           0-0\n",
       "2871           0-0\n",
       "2872           0-0\n",
       "2873    5000-10000\n",
       "2874           0-0\n",
       "Name: 对象薪水, Length: 2827, dtype: object"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['对象薪水']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>...</th>\n",
       "      <th>最高工资</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "      <th>对象平均年龄</th>\n",
       "      <th>对象最低身高</th>\n",
       "      <th>对象最高身高</th>\n",
       "      <th>对象平均身高</th>\n",
       "      <th>对象最低薪水</th>\n",
       "      <th>对象最高薪水</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>162 - 187</td>\n",
       "      <td>...</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>162</td>\n",
       "      <td>187</td>\n",
       "      <td>174.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38</td>\n",
       "      <td>161 - 186</td>\n",
       "      <td>...</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>30</td>\n",
       "      <td>38</td>\n",
       "      <td>34.0</td>\n",
       "      <td>161</td>\n",
       "      <td>186</td>\n",
       "      <td>173.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>158 - 183</td>\n",
       "      <td>...</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>158</td>\n",
       "      <td>183</td>\n",
       "      <td>170.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31</td>\n",
       "      <td>160 - 185</td>\n",
       "      <td>...</td>\n",
       "      <td>20000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>23</td>\n",
       "      <td>31</td>\n",
       "      <td>27.0</td>\n",
       "      <td>160</td>\n",
       "      <td>185</td>\n",
       "      <td>172.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>168 - 193</td>\n",
       "      <td>...</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>168</td>\n",
       "      <td>193</td>\n",
       "      <td>180.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍     对象年龄       对象身高  ...  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  162 - 187  ...   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38  161 - 186  ...   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  158 - 183  ...   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31  160 - 185  ...   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29  168 - 193  ...   \n",
       "\n",
       "      最高工资     平均工资 对象最低年龄 对象最高年龄 对象平均年龄  对象最低身高  对象最高身高  对象平均身高  对象最低薪水  \\\n",
       "0  20000.0  15000.0     21     29   25.0     162     187   174.5       0   \n",
       "1  10000.0   7500.0     30     38   34.0     161     186   173.5       0   \n",
       "2  20000.0  15000.0     21     29   25.0     158     183   170.5       0   \n",
       "3  20000.0  15000.0     23     31   27.0     160     185   172.5       0   \n",
       "4  10000.0   7500.0     21     29   25.0     168     193   180.5       0   \n",
       "\n",
       "   对象最高薪水  \n",
       "0       0  \n",
       "1       0  \n",
       "2       0  \n",
       "3       0  \n",
       "4       0  \n",
       "\n",
       "[5 rows x 26 columns]"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "salary = df['对象薪水'].apply(get_salary_max_min)\n",
    "df['对象最低薪水'] = salary.str[0]\n",
    "df['对象最高薪水'] = salary.str[1]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 把含有中文和特殊字符的处理了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "# 发现错误 ， 有‘>50000’ 的字符串数据， 改成50000，方便计算\n",
    "indexs = df[df['对象最低薪水'] == '>50000'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '50000'\n",
    "df.loc[indexs, '对象最高薪水'] = '50000'\n",
    "\n",
    "indexs = df[df['对象最高薪水'] == '50000,>50000'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '50000'\n",
    "df.loc[indexs, '对象最高薪水'] = '50000'\n",
    "\n",
    "indexs = df[df['对象最高薪水'] == '10000,10000'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '10000'\n",
    "df.loc[indexs, '对象最高薪水'] = '10000'\n",
    "\n",
    "indexs = df[df['对象最高薪水'] == '20000,20000'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '20000'\n",
    "df.loc[indexs, '对象最高薪水'] = '20000'\n",
    "\n",
    "indexs = df[df['对象最高薪水'] == '5000,5000'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '5000'\n",
    "df.loc[indexs, '对象最高薪水'] = '5000'\n",
    "\n",
    "indexs = df[df['对象最高薪水'] == '5000,10000'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '5000'\n",
    "df.loc[indexs, '对象最高薪水'] = '10000'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 发现错误，有字符串数据， 把这个也改成0好了\n",
    "indexs = df[df['对象最低薪水'] == '2000以下,2000'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '0'\n",
    "df.loc[indexs, '对象最高薪水'] = '0'\n",
    "indexs = df[df['对象最低薪水'] == '2000以下,5000'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '0'\n",
    "df.loc[indexs, '对象最高薪水'] = '0'\n",
    "indexs = df[df['对象最低薪水'] == '2000以下'].index\n",
    "df.loc[indexs, '对象最低薪水'] = '0'\n",
    "df.loc[indexs, '对象最高薪水'] = '0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 2827 entries, 0 to 2874\n",
      "Data columns (total 26 columns):\n",
      " #   Column  Non-Null Count  Dtype  \n",
      "---  ------  --------------  -----  \n",
      " 0   年龄      2827 non-null   object \n",
      " 1   身高      2827 non-null   object \n",
      " 2   学历      2827 non-null   object \n",
      " 3   工资      2827 non-null   object \n",
      " 4   家乡      2827 non-null   object \n",
      " 5   婚姻      2827 non-null   object \n",
      " 6   住房      2827 non-null   object \n",
      " 7   自我介绍    2827 non-null   object \n",
      " 8   对象年龄    2827 non-null   object \n",
      " 9   对象身高    2827 non-null   object \n",
      " 10  对象学历    2827 non-null   object \n",
      " 11  对象薪水    2827 non-null   object \n",
      " 12  对象家乡    2827 non-null   object \n",
      " 13  对象婚姻    2827 non-null   object \n",
      " 14  对象住房    2827 non-null   object \n",
      " 15  最低工资    2827 non-null   int64  \n",
      " 16  最高工资    2827 non-null   float64\n",
      " 17  平均工资    2827 non-null   float64\n",
      " 18  对象最低年龄  2827 non-null   int64  \n",
      " 19  对象最高年龄  2827 non-null   int64  \n",
      " 20  对象平均年龄  2827 non-null   float64\n",
      " 21  对象最低身高  2827 non-null   int64  \n",
      " 22  对象最高身高  2827 non-null   int64  \n",
      " 23  对象平均身高  2827 non-null   float64\n",
      " 24  对象最低薪水  2827 non-null   int64  \n",
      " 25  对象最高薪水  2827 non-null   int64  \n",
      "dtypes: float64(4), int64(7), object(15)\n",
      "memory usage: 676.3+ KB\n"
     ]
    }
   ],
   "source": [
    "df['对象最低薪水'] = pd.to_numeric(df['对象最低薪水'])\n",
    "df['对象最高薪水'] = pd.to_numeric(df['对象最高薪水'])\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>...</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "      <th>对象平均年龄</th>\n",
       "      <th>对象最低身高</th>\n",
       "      <th>对象最高身高</th>\n",
       "      <th>对象平均身高</th>\n",
       "      <th>对象最低薪水</th>\n",
       "      <th>对象最高薪水</th>\n",
       "      <th>对象平均薪水</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>162</td>\n",
       "      <td>高中</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>162 - 187</td>\n",
       "      <td>...</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>162</td>\n",
       "      <td>187</td>\n",
       "      <td>174.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>31</td>\n",
       "      <td>161</td>\n",
       "      <td>大专</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>澳门特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>30 - 38</td>\n",
       "      <td>161 - 186</td>\n",
       "      <td>...</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>30</td>\n",
       "      <td>38</td>\n",
       "      <td>34.0</td>\n",
       "      <td>161</td>\n",
       "      <td>186</td>\n",
       "      <td>173.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23</td>\n",
       "      <td>158</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>158 - 183</td>\n",
       "      <td>...</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>158</td>\n",
       "      <td>183</td>\n",
       "      <td>170.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>台湾省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...</td>\n",
       "      <td>23 - 31</td>\n",
       "      <td>160 - 185</td>\n",
       "      <td>...</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>23</td>\n",
       "      <td>31</td>\n",
       "      <td>27.0</td>\n",
       "      <td>160</td>\n",
       "      <td>185</td>\n",
       "      <td>172.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>22</td>\n",
       "      <td>168</td>\n",
       "      <td>高中</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>香港特别行政区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>21 - 29</td>\n",
       "      <td>168 - 193</td>\n",
       "      <td>...</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>21</td>\n",
       "      <td>29</td>\n",
       "      <td>25.0</td>\n",
       "      <td>168</td>\n",
       "      <td>193</td>\n",
       "      <td>180.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   年龄   身高  学历           工资       家乡  婚姻      住房  \\\n",
       "0  22  162  高中  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "1  31  161  大专   5000-10000  澳门特别行政区  未婚  以后再告诉你   \n",
       "2  23  158  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "3  24  160  大专  10000-20000      台湾省  未婚  以后再告诉你   \n",
       "4  22  168  高中   5000-10000  香港特别行政区  未婚  以后再告诉你   \n",
       "\n",
       "                                                自我介绍     对象年龄       对象身高  ...  \\\n",
       "0  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  162 - 187  ...   \n",
       "1  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  30 - 38  161 - 186  ...   \n",
       "2  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  21 - 29  158 - 183  ...   \n",
       "3  \\n                    大家好，很高兴来到这里，非常希望认识更多的朋友，...  23 - 31  160 - 185  ...   \n",
       "4  \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  21 - 29  168 - 193  ...   \n",
       "\n",
       "      平均工资 对象最低年龄 对象最高年龄 对象平均年龄 对象最低身高  对象最高身高  对象平均身高  对象最低薪水  对象最高薪水  对象平均薪水  \n",
       "0  15000.0     21     29   25.0    162     187   174.5       0       0     0.0  \n",
       "1   7500.0     30     38   34.0    161     186   173.5       0       0     0.0  \n",
       "2  15000.0     21     29   25.0    158     183   170.5       0       0     0.0  \n",
       "3  15000.0     23     31   27.0    160     185   172.5       0       0     0.0  \n",
       "4   7500.0     21     29   25.0    168     193   180.5       0       0     0.0  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['对象平均薪水'] = df[['对象最低薪水', '对象最高薪水']].mean(axis=1)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>身高</th>\n",
       "      <th>学历</th>\n",
       "      <th>工资</th>\n",
       "      <th>家乡</th>\n",
       "      <th>婚姻</th>\n",
       "      <th>住房</th>\n",
       "      <th>自我介绍</th>\n",
       "      <th>对象年龄</th>\n",
       "      <th>对象身高</th>\n",
       "      <th>...</th>\n",
       "      <th>平均工资</th>\n",
       "      <th>对象最低年龄</th>\n",
       "      <th>对象最高年龄</th>\n",
       "      <th>对象平均年龄</th>\n",
       "      <th>对象最低身高</th>\n",
       "      <th>对象最高身高</th>\n",
       "      <th>对象平均身高</th>\n",
       "      <th>对象最低薪水</th>\n",
       "      <th>对象最高薪水</th>\n",
       "      <th>对象平均薪水</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>63</td>\n",
       "      <td>162</td>\n",
       "      <td>初中</td>\n",
       "      <td>2000-5000</td>\n",
       "      <td>河南省</td>\n",
       "      <td>离异</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>58 - 66</td>\n",
       "      <td>170 - 190</td>\n",
       "      <td>...</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>58</td>\n",
       "      <td>66</td>\n",
       "      <td>62.0</td>\n",
       "      <td>170</td>\n",
       "      <td>190</td>\n",
       "      <td>180.0</td>\n",
       "      <td>2000</td>\n",
       "      <td>5000</td>\n",
       "      <td>3500.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>30</td>\n",
       "      <td>166</td>\n",
       "      <td>本科</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>浙江省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>已购房(有贷款)</td>\n",
       "      <td>\\n                    希望在百合遇见最好的归宿，如果你符合我的择偶要求...</td>\n",
       "      <td>35 - 49</td>\n",
       "      <td>166 - 191</td>\n",
       "      <td>...</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>35</td>\n",
       "      <td>49</td>\n",
       "      <td>42.0</td>\n",
       "      <td>166</td>\n",
       "      <td>191</td>\n",
       "      <td>178.5</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>47</td>\n",
       "      <td>159</td>\n",
       "      <td>中专/职高/技校</td>\n",
       "      <td>2000-5000</td>\n",
       "      <td>湖北省</td>\n",
       "      <td>离异</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...</td>\n",
       "      <td>47 - 55</td>\n",
       "      <td>167 - 178</td>\n",
       "      <td>...</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>47</td>\n",
       "      <td>55</td>\n",
       "      <td>51.0</td>\n",
       "      <td>167</td>\n",
       "      <td>178</td>\n",
       "      <td>172.5</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>33</td>\n",
       "      <td>168</td>\n",
       "      <td>本科</td>\n",
       "      <td>5000-10000</td>\n",
       "      <td>西藏自治区</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>18 - 48</td>\n",
       "      <td>157 - 190</td>\n",
       "      <td>...</td>\n",
       "      <td>7500.0</td>\n",
       "      <td>18</td>\n",
       "      <td>48</td>\n",
       "      <td>33.0</td>\n",
       "      <td>157</td>\n",
       "      <td>190</td>\n",
       "      <td>173.5</td>\n",
       "      <td>2000</td>\n",
       "      <td>5000</td>\n",
       "      <td>3500.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>219</th>\n",
       "      <td>62</td>\n",
       "      <td>160</td>\n",
       "      <td>高中</td>\n",
       "      <td>2000-5000</td>\n",
       "      <td>新疆自治区</td>\n",
       "      <td>丧偶</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    \\t\\t\\t\\t</td>\n",
       "      <td>55 - 65</td>\n",
       "      <td>173 - 211</td>\n",
       "      <td>...</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>55</td>\n",
       "      <td>65</td>\n",
       "      <td>60.0</td>\n",
       "      <td>173</td>\n",
       "      <td>211</td>\n",
       "      <td>192.0</td>\n",
       "      <td>10000</td>\n",
       "      <td>10000</td>\n",
       "      <td>10000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2854</th>\n",
       "      <td>50</td>\n",
       "      <td>168</td>\n",
       "      <td>中专/职高/技校</td>\n",
       "      <td>2000以下</td>\n",
       "      <td>江苏省</td>\n",
       "      <td>离异</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    等待，只为与你相遇，从相识、相爱、到遥远的未来。...</td>\n",
       "      <td>43 - 54</td>\n",
       "      <td>168 - 188</td>\n",
       "      <td>...</td>\n",
       "      <td>2000.0</td>\n",
       "      <td>43</td>\n",
       "      <td>54</td>\n",
       "      <td>48.5</td>\n",
       "      <td>168</td>\n",
       "      <td>188</td>\n",
       "      <td>178.0</td>\n",
       "      <td>10000</td>\n",
       "      <td>20000</td>\n",
       "      <td>15000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2860</th>\n",
       "      <td>38</td>\n",
       "      <td>163</td>\n",
       "      <td>本科</td>\n",
       "      <td>2000-5000</td>\n",
       "      <td>辽宁省</td>\n",
       "      <td>离异</td>\n",
       "      <td>已购房(有贷款)</td>\n",
       "      <td>\\n                    　　婚姻有两种，一种叫搭伙，一种叫余生！</td>\n",
       "      <td>33 - 44</td>\n",
       "      <td>175 - 185</td>\n",
       "      <td>...</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>33</td>\n",
       "      <td>44</td>\n",
       "      <td>38.5</td>\n",
       "      <td>175</td>\n",
       "      <td>185</td>\n",
       "      <td>180.0</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000</td>\n",
       "      <td>7500.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2866</th>\n",
       "      <td>51</td>\n",
       "      <td>160</td>\n",
       "      <td>本科</td>\n",
       "      <td>10000-20000</td>\n",
       "      <td>山东省</td>\n",
       "      <td>离异</td>\n",
       "      <td>已购房(无贷款)</td>\n",
       "      <td>\\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...</td>\n",
       "      <td>46 - 57</td>\n",
       "      <td>170 - 178</td>\n",
       "      <td>...</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>46</td>\n",
       "      <td>57</td>\n",
       "      <td>51.5</td>\n",
       "      <td>170</td>\n",
       "      <td>178</td>\n",
       "      <td>174.0</td>\n",
       "      <td>20000</td>\n",
       "      <td>20000</td>\n",
       "      <td>20000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2869</th>\n",
       "      <td>35</td>\n",
       "      <td>160</td>\n",
       "      <td>大专</td>\n",
       "      <td>2000-5000</td>\n",
       "      <td>福建省</td>\n",
       "      <td>未婚</td>\n",
       "      <td>以后再告诉你</td>\n",
       "      <td>\\n                    自我介绍：等待，只为与你相遇，从相识 相爱到遥远...</td>\n",
       "      <td>32 - 40</td>\n",
       "      <td>170 - 180</td>\n",
       "      <td>...</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>32</td>\n",
       "      <td>40</td>\n",
       "      <td>36.0</td>\n",
       "      <td>170</td>\n",
       "      <td>180</td>\n",
       "      <td>175.0</td>\n",
       "      <td>20000</td>\n",
       "      <td>20000</td>\n",
       "      <td>20000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2873</th>\n",
       "      <td>41</td>\n",
       "      <td>153</td>\n",
       "      <td>中专/职高/技校</td>\n",
       "      <td>2000-5000</td>\n",
       "      <td>湖北省</td>\n",
       "      <td>离异</td>\n",
       "      <td>以后告诉你</td>\n",
       "      <td>\\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...</td>\n",
       "      <td>40 - 48</td>\n",
       "      <td>153 - 178</td>\n",
       "      <td>...</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>40</td>\n",
       "      <td>48</td>\n",
       "      <td>44.0</td>\n",
       "      <td>153</td>\n",
       "      <td>178</td>\n",
       "      <td>165.5</td>\n",
       "      <td>5000</td>\n",
       "      <td>10000</td>\n",
       "      <td>7500.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>545 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      年龄   身高        学历           工资     家乡  婚姻        住房  \\\n",
       "25    63  162        初中    2000-5000    河南省  离异  已购房(无贷款)   \n",
       "89    30  166        本科  10000-20000    浙江省  未婚  已购房(有贷款)   \n",
       "116   47  159  中专/职高/技校    2000-5000    湖北省  离异     以后告诉你   \n",
       "192   33  168        本科   5000-10000  西藏自治区  未婚     以后告诉你   \n",
       "219   62  160        高中    2000-5000  新疆自治区  丧偶    以后再告诉你   \n",
       "...   ..  ...       ...          ...    ...  ..       ...   \n",
       "2854  50  168  中专/职高/技校       2000以下    江苏省  离异    以后再告诉你   \n",
       "2860  38  163        本科    2000-5000    辽宁省  离异  已购房(有贷款)   \n",
       "2866  51  160        本科  10000-20000    山东省  离异  已购房(无贷款)   \n",
       "2869  35  160        大专    2000-5000    福建省  未婚    以后再告诉你   \n",
       "2873  41  153  中专/职高/技校    2000-5000    湖北省  离异     以后告诉你   \n",
       "\n",
       "                                                   自我介绍     对象年龄       对象身高  \\\n",
       "25    \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  58 - 66  170 - 190   \n",
       "89    \\n                    希望在百合遇见最好的归宿，如果你符合我的择偶要求...  35 - 49  166 - 191   \n",
       "116   \\n                    我希望自己不是这里的一个过客，盼望找到我爱的人，...  47 - 55  167 - 178   \n",
       "192   \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  18 - 48  157 - 190   \n",
       "219                      \\n                    \\t\\t\\t\\t  55 - 65  173 - 211   \n",
       "...                                                 ...      ...        ...   \n",
       "2854  \\n                    等待，只为与你相遇，从相识、相爱、到遥远的未来。...  43 - 54  168 - 188   \n",
       "2860         \\n                    　　婚姻有两种，一种叫搭伙，一种叫余生！  33 - 44  175 - 185   \n",
       "2866  \\n                    在百合，我希望自己不是这里的一个过客，而是盼望在...  46 - 57  170 - 178   \n",
       "2869  \\n                    自我介绍：等待，只为与你相遇，从相识 相爱到遥远...  32 - 40  170 - 180   \n",
       "2873  \\n                    在百合，我希望找到一个真诚，自信，积极，阳光的另...  40 - 48  153 - 178   \n",
       "\n",
       "      ...     平均工资 对象最低年龄 对象最高年龄 对象平均年龄 对象最低身高  对象最高身高  对象平均身高  对象最低薪水  \\\n",
       "25    ...   3500.0     58     66   62.0    170     190   180.0    2000   \n",
       "89    ...  15000.0     35     49   42.0    166     191   178.5   50000   \n",
       "116   ...   3500.0     47     55   51.0    167     178   172.5   50000   \n",
       "192   ...   7500.0     18     48   33.0    157     190   173.5    2000   \n",
       "219   ...   3500.0     55     65   60.0    173     211   192.0   10000   \n",
       "...   ...      ...    ...    ...    ...    ...     ...     ...     ...   \n",
       "2854  ...   2000.0     43     54   48.5    168     188   178.0   10000   \n",
       "2860  ...   3500.0     33     44   38.5    175     185   180.0    5000   \n",
       "2866  ...  15000.0     46     57   51.5    170     178   174.0   20000   \n",
       "2869  ...   3500.0     32     40   36.0    170     180   175.0   20000   \n",
       "2873  ...   3500.0     40     48   44.0    153     178   165.5    5000   \n",
       "\n",
       "      对象最高薪水   对象平均薪水  \n",
       "25      5000   3500.0  \n",
       "89     50000  50000.0  \n",
       "116    50000  50000.0  \n",
       "192     5000   3500.0  \n",
       "219    10000  10000.0  \n",
       "...      ...      ...  \n",
       "2854   20000  15000.0  \n",
       "2860   10000   7500.0  \n",
       "2866   20000  20000.0  \n",
       "2869   20000  20000.0  \n",
       "2873   10000   7500.0  \n",
       "\n",
       "[545 rows x 27 columns]"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[df['对象平均薪水'] > 3000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12437.614678899083"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 看下粗略的平均工资 工作以后有个数嘿嘿\n",
    "indexs = df.loc[df['对象平均薪水'] > 0].index\n",
    "df.loc[indexs, '对象平均薪水'].mean(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 挺好的 呵呵"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 为做词云做准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Dumping model to file cache C:\\Users\\yujiaqi\\AppData\\Local\\Temp\\jieba.cache\n",
      "Loading model cost 2.442 seconds.\n",
      "Prefix dict has been built succesfully.\n"
     ]
    }
   ],
   "source": [
    "with open(r\"stopword.txt\",\"r\") as f:\n",
    "    stopword = f.read()\n",
    "stopword = stopword.split()\n",
    "stopword = stopword + [\"百合\",\"另一半\",\" \", '\\n', '一半', '相扶相持','\\t' ]\n",
    "\n",
    "df[\"自我介绍\"] = df[\"自我介绍\"].str[2:-2].apply(lambda x:x.lower()).apply(lambda x:\"\".join(x))\\\n",
    "    .apply(jieba.lcut).apply(lambda x:[i for i in x if i not in stopword])\n",
    "df.loc[df[\"自我介绍\"].apply(lambda x:len(x) < 6),\"自我介绍\"] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['年龄', '身高', '学历', '工资', '家乡', '婚姻', '住房', '自我介绍', '对象年龄', '对象身高',\n",
       "       '对象学历', '对象薪水', '对象家乡', '对象婚姻', '对象住房', '最低工资', '最高工资', '平均工资',\n",
       "       '对象最低年龄', '对象最高年龄', '对象平均年龄', '对象最低身高', '对象最高身高', '对象平均身高', '对象最低薪水',\n",
       "       '对象最高薪水', '对象平均薪水'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 保存处理后的文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature = ['年龄', '身高', '学历', '工资', '家乡', '婚姻', '住房', '自我介绍', '对象年龄', '对象身高',\n",
    "       '对象学历', '对象薪水', '对象家乡', '对象婚姻', '对象住房', '最低工资', '最高工资', '平均工资',\n",
    "       '对象最低年龄', '对象最高年龄', '对象平均年龄', '对象最低身高', '对象最高身高', '对象平均身高', '对象最低薪水',\n",
    "       '对象最高薪水', '对象平均薪水']\n",
    "final_df = df[feature]\n",
    "final_df.to_excel(r\"可视化.xlsx\",encoding=\"gbk\",index=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
